#delimit;
local fileloc = "~/KMS_REPLICATION";
set logtype text;
capture log close templog;

log using `fileloc'/log_files/distances_to_zips.txt, name(templog) replace;
set more off;
clear all;

** NOTE: this uses the program "vincenty", a user-written program in Stata that calculates the distance between two points based on latitude and longitude;

**************************************************************************;
********************** ZIP CODE CENTROID INFORMATION**********************;
**************************************************************************;

** Zip code information is from ZCTA's, which is how the census does shape files. Very similar to major zips;
** zcta5.txt data file called is zip_dict.dct from https://www.census.gov/geo/maps-data/data/gazetteer2000.html;


**Adding zip code centroid information;
infile using `fileloc'/dicts/zip_dict.dct, clear;
** Keep only California;
drop if state ~= "CA";
drop state;
destring zip, replace force;
rename zip mother_zip;
drop if mother_zip == .;

gsort mother_zip, g(zip_sort);

sum zip_sort;
rename latitude zip_lat;
rename longitude zip_long;

save `fileloc'/data/location_data/zip_code_locations.dta, replace;

**************************************************************************;
********************** WEATHER STATION LOCATION **************************;
**************************************************************************;
** Taken from ftp://ftp.ncdc.noaa.gov/pub/data/inventories/ISH-HISTORY.TXT;

insheet using `fileloc'/data/location_data/station_list.csv, names clear;

drop if usaf == "end";
destring lev, g(elevation);
replace elevation = . if elevation == 99999;
replace elevation = -elevation if e == "-";
keep if ct == "US" & st == "CA";
destring at, g(latitude);
drop if latitude == 99999;
replace latitude = latitude/1000;
replace latitude = -latitude if l == "-";
destring on, g(longitude);
drop if longitude == 999999;
replace longitude = longitude/1000;
replace longitude = -longitude if v10 == "-";
keep usaf wban latitude longitude elevation;
drop if latitude == float(-99.999);
drop if longitude == float(-999.999);
drop if elevation == float(-99999);
drop if longitude < -180 | latitude > 180;
rename latitude weather_lat;
rename longitude weather_long;

save `fileloc'/data/location_data/weather_locations.dta, replace;

**************************************************************************;
********************** EMISSIONS STATION LOCATION ************************;
**************************************************************************;

** NOTE: file started as "location.dbf.exe" from CARB website – had to open in excel, save as csv for import into Stata;

insheet using `fileloc'/data/location_data/pollution_location.csv, names clear;

drop if latitude == . | longitude == .;
drop if latitude == 0 | longitude == 0;

keep location latitude longitude elevation;
rename latitude pollution_lat;
rename longitude pollution_long;

save `fileloc'/data/location_data/pollution_locations, replace;

**************************************************************************;
*********************** TRAFFIC STATION LOCATION *************************;
**************************************************************************;

** Traffic location data from latest location data – discussions with workers at PeMS indicates stations are unlikely to move once established;

clear;
tempfile traffic_location;
save `traffic_location', emptyok;

foreach section in 03 04 07 08 11 12 {;
	insheet using `fileloc'/data/location_data/d`section'_stations_2007.txt, clear;
	keep if length ~= .;	
	keep id latitude longitude length type fwy dir;
	rename latitude traffic_lat;
	rename longitude traffic_long;
	append using `traffic_location';
	save `traffic_location', replace;
};

sort id;
save `fileloc'/data/location_data/traffic_locations.dta, replace;

***************************************************************************
** CONSTRUCT MASTER FILE OF ALL DISTANCES BETWEEN ZIPS AND POLLUTION ******
**************************************************************************;

use `fileloc'/data/location_data/zip_code_locations.dta, clear;

sum zip_sort;
local maxzip = r(max);

preserve;

clear;

tempfile zip_to_pollution;
save `zip_to_pollution', emptyok;

quietly {;
	forvalues zip = 1/`maxzip' {;
		restore;
		preserve;
		keep if zip_sort == `zip';
		noisily di `maxzip' - `zip';
		local zip_lat = zip_lat;
		local zip_long = zip_long;
		local mother_zip = mother_zip;
		use `fileloc'/data/location_data/pollution_locations.dta, clear;
		keep location pollution_lat pollution_long elevation;
		gen zip_lat = `zip_lat';
		gen zip_long = `zip_long';
		gen mother_zip = `mother_zip';
		qui vincenty zip_lat zip_long pollution_lat pollution_long, hav(distance);
		keep location *lat *long mother_zip distance;
		append using `zip_to_pollution';
		save `zip_to_pollution', replace;
	};
};

restore, not;

gsort mother_zip, g(count);
sum count;
drop count;
sort mother_zip location;

save `fileloc'/data/location_data/pollution_to_zip_distance.dta, replace;


**************************************************************************;
**CONSTRUCT MASTER FILE OF ALL DISTANCES BETWEEN ZIPS AND TRAFFIC SENSORS*;
**************************************************************************;

use `fileloc'/data/location_data/zip_code_locations.dta, clear;

sum zip_sort;
local maxzip = r(max);

preserve;

clear;

di `maxzip';

tempfile zip_to_traffic;
save `zip_to_traffic', emptyok;

quietly {;
	forvalues zip = 1/`maxzip' {;
		restore;
		preserve;	
		keep if zip_sort == `zip';
		** Display progress;
		noisily di `maxzip' - `zip';
		local  zip_lat = zip_lat;
		local  zip_long = zip_long;
		local mother_zip = mother_zip;
		use `fileloc'/data/location_data/traffic_locations.dta, clear;
		gen zip_lat = float(`zip_lat');
		gen zip_long = float(`zip_long');
		gen mother_zip = `mother_zip';	
		qui vincenty zip_lat zip_long traffic_lat traffic_long, hav(distance);		
		keep id *lat *long mother_zip distance;
		append using `zip_to_traffic';
		save `zip_to_traffic', replace;
	};
};

restore, not;

keep mother_zip distance id;
order mother_zip id distance;

gsort mother_zip, g(count);
sum count;
drop count;
sort mother_zip id;

save `fileloc'/data/location_data/traffic_to_zip_distance.dta, replace;

**************************************************************************;
**CONSTRUCT MASTER FILE OF ALL DISTANCES BETWEEN ZIPS AND WEATHER SENSORS*;
**************************************************************************;

use `fileloc'/data/location_data/zip_code_locations.dta, clear;

sum zip_sort;
local maxzip = r(max);

preserve;

clear;

tempfile zip_to_weather;
save `zip_to_weather', emptyok;

quietly {;
	forvalues zip = 1/`maxzip' {;
		restore;
		preserve;
		keep if zip_sort == `zip';
		noisily di `maxzip' - `zip';		
		local zip_lat = zip_lat;
		local zip_long = zip_long;
		local mother_zip = mother_zip;
		use `fileloc'/data/location_data/weather_locations.dta, clear;
		gen zip_lat = `zip_lat';
		gen zip_long = `zip_long';
		gen mother_zip = `mother_zip';
		qui vincenty zip_lat zip_long weather_lat weather_long, hav(distance);
		keep usaf wban *lat *long mother_zip distance;
		append using `zip_to_weather';
		qui save `zip_to_weather', replace;			
	};
};

restore, not;
		
order mother_zip usaf wban distance;

gsort mother_zip, g(count);
sum count;
drop count;
sort mother_zip usaf wban;

save `fileloc'/data/location_data/weather_to_zip_distance.dta, replace;

log close templog;

clear all;
	
	
	
	